# Table_2.R

# Part of the replication archive for 
#
#   Bullock, John G., and Kelly Rader. 2021. "Response Options and the 
#   Measurement of Political Knowledge." Forthcoming in the British Journal 
#   of Political Science.

library(boot, include.only = c("boot", "boot.ci"))
library(car,  include.only = c("lht", "Recode"))
library(Bullock)
library(dplyr)     # for %>%, n_distinct()
library(here)      # for here::here()
library(Hmisc)     # for llist()
library(lmtest)    # for coeftest()
library(magrittr)  # for %T>%
library(purrr)     # for map_int()
library(survey)    # for svydesign(), svyglm()


# SET WORKING DIRECTORY AND SOURCE FILES
source(here::here("R/SSI_2017_analysis.R"))


# SET BOOLEANS
# Table   2: MODELS_WITH_CONTROLS is FALSE, ONLY...CONTROL is TRUE,  ONLY...WEIGHTS is TRUE,  USE_WEIGHTS is TRUE
# Table  A3: MODELS_WITH_CONTROLS is FALSE, ONLY...CONTROL is TRUE,  ONLY...WEIGHTS is TRUE,  USE_WEIGHTS is FALSE
# Table  A4: MODELS_WITH_CONTROLS is FALSE, ONLY...CONTROL is FALSE, ONLY...WEIGHTS is FALSE, USE_WEIGHTS is FALSE
# Table A12: MODELS_WITH_CONTROLS is TRUE,  ONLY...CONTROL is TRUE,  ONLY...WEIGHTS is TRUE,  USE_WEIGHTS is TRUE
if (interactive()) {
  MODELS_WITH_CONTROLS                         <- FALSE
  ONLY_SUBJECTS_WHO_ANSWERED_CONTROL_QUESTIONS <- TRUE
  ONLY_SUBJECTS_WHO_HAVE_WEIGHTS               <- TRUE
  USE_WEIGHTS                                  <- TRUE
  
  CREATE_SCREENER_TABLES                       <- TRUE
} else {
  clArgs <- commandArgs(trailingOnly = TRUE)
  if (length(clArgs) <= 0) { 
    writeLines(paste0(
      "clArgs has length ", length(clArgs), 
      ".  This length is less than 1, so the script is stopping."))
    stop()
  }
  MODELS_WITH_CONTROLS                         <- as.logical(clArgs[1])
  ONLY_SUBJECTS_WHO_ANSWERED_CONTROL_QUESTIONS <- as.logical(clArgs[2])
  ONLY_SUBJECTS_WHO_HAVE_WEIGHTS               <- as.logical(clArgs[3])
  USE_WEIGHTS                                  <- as.logical(clArgs[4])
  
  CREATE_SCREENER_TABLES                       <- as.logical(clArgs[5])
}


# SET OUTPUT PATH AND FILENAME
filenameStem <- case_when(
  MODELS_WITH_CONTROLS ~ here::here('float_output/Table_A12'),
  !MODELS_WITH_CONTROLS && 
    ONLY_SUBJECTS_WHO_ANSWERED_CONTROL_QUESTIONS &&
    !USE_WEIGHTS ~ here::here('float_output/Table_A3'),
  !MODELS_WITH_CONTROLS && 
    !ONLY_SUBJECTS_WHO_ANSWERED_CONTROL_QUESTIONS &&
    !USE_WEIGHTS ~ here::here('float_output/Table_A4'),
  TRUE ~ here::here('float_output/Table_2'))



# **************************************************************************
# ANALYZE EFFECT OF NUMBER OF RESPONSE OPTIONS ####
# **************************************************************************
# Regress correct answers on the number of response options offered, pooling
# over all 12 questions for which we varied the response options.
#   Per the last page of the pre-analysis plan, we omit subjects who are   
# missing data on covariates. We use filter_at() to do this.
#
# # This is regression (1) in the tables.
numROs_dfForEstimation <- data.frame(
  correct, 
  numROs,  # NA for open-ended respondent-questions
  weights        = data$weight,
  question       = rep(factor(gsub('_numROs', '', numROs_names)), each = length(data$age)), 
  psid           = data$psid,
  ID             = 1:length(data$age),
  age            = data$age,
  age.sq         = data$age^2,
  educ           = data$educ,
  female         = data$female,
  PID            = data$PID,
  race           = data$raceCoalesced,
  state          = data$state,
  Pence          = data$Pence,
  SenateTerm     = data$SenateTerm,
  Yellen         = data$Yellen,
  placeboCorrect = data$placeboCorrect,
  passedScreenerMedia  = data$passedScreenerMedia,
  passedScreenerSalary = data$passedScreenerSalary,
  screenerSum    = data$screenerSum) %>%
  { if (USE_WEIGHTS || ONLY_SUBJECTS_WHO_HAVE_WEIGHTS) filter(., !is.na(weights)) else . } %>%
  { if (ONLY_SUBJECTS_WHO_ANSWERED_CONTROL_QUESTIONS)  filter_at(., vars(age:Yellen), all_vars(!is.na(.))) else . } 


# Set up the survey-weighted design object. Per Thomas Lumley's advice at 
# https://stats.stackexchange.com/a/410204/9618, we use "ids = ~ID" to 
# cluster SEs by respondent. 
numROsDO <- numROs_dfForEstimation %>%  # DO = "design object"
  { svydesign(
      ids     = ~ID,
      data    = ., 
      weights = if (USE_WEIGHTS) numROs_dfForEstimation$weight else NULL
    ) 
  }

numROs_lm1 <- svyglm(
  formula     = correct ~ as.factor(numROs) + question, 
  design      = numROsDO,
  singular.ok = FALSE)

numROs_lm2   <- svyglm(
  formula     = correct ~ as.factor(numROs) + question + age + age.sq + educ + female + PID + race + state + Pence + SenateTerm + Yellen, 
  design      = numROsDO,
  singular.ok = FALSE)



# **************************************************************************
# ANALYZE EFFECT OF QUESTION DIFFICULTY ####
# **************************************************************************
# This is regression (2) in the tables.
diff_dfForEstimation <- data.frame(
  correct_diff, 
  diffTF, 
  weights        = data$weight,
  question       = rep(factor(gsub('_diffTF', '', diffTF_names)), each = length(data$age)),   
  ID             = 1:length(data$age),
  age            = data$age,
  age.sq         = data$age^2,
  educ           = data$educ,
  female         = data$female,
  PID            = data$PID,
  race           = data$raceCoalesced,
  state          = data$state,
  Pence          = data$Pence,
  SenateTerm     = data$SenateTerm,
  Yellen         = data$Yellen,
  placeboCorrect = data$placeboCorrect,
  passedScreenerMedia  = data$passedScreenerMedia,
  passedScreenerSalary = data$passedScreenerSalary,
  screenerSum    = data$screenerSum) %>%
  { if (USE_WEIGHTS || ONLY_SUBJECTS_WHO_HAVE_WEIGHTS) filter(., !is.na(weights)) else . } %>%
  { if (ONLY_SUBJECTS_WHO_ANSWERED_CONTROL_QUESTIONS) filter_at(., vars(age:Yellen), all_vars(!is.na(.))) else . } 

diffDO <- diff_dfForEstimation %>%        # DO = "design object"
  { svydesign(
      ids     = ~ID, 
      data    = ., 
      weights = if (USE_WEIGHTS) diff_dfForEstimation$weight else NULL
    ) 
  }

diff_lm1   <- svyglm(
  formula     = correct_diff ~ as.factor(diffTF) + question, 
  design      = diffDO,
  singular.ok = FALSE)

diff_lm2   <- svyglm(
  formula     = correct_diff ~ as.factor(diffTF) + question + age + age.sq + educ + female + PID + race + state + Pence + SenateTerm + Yellen, 
  design      = diffDO,
  singular.ok = FALSE)



# **************************************************************************
# POOLED ANALYSIS OF OPEN- VS. CLOSED-ENDED QUESTIONS ####
# **************************************************************************
# This is regression (3) in the tables.
correct_fullQuestionsOnly   <- unlist(lapply(varNames_correct,   get, envir = as.environment(data)))
condition_fullQuestionsOnly <- unlist(lapply(varNames_condition, get, envir = as.environment(data))) %>%
  relevel(ref = 'OE')  # open-ended is the reference category
main_dfForEstimation <- data.frame(
    correct_fullQuestionsOnly, 
    condition_fullQuestionsOnly, 
    weights        = data$weight,
    question       = rep(factor(gsub('_correct', '', varNames_correct)), each = length(data$age)), 
    ID             = 1:length(data$age),
    age            = data$age,
    age.sq         = data$age^2,
    educ           = data$educ,
    female         = data$female,
    PID            = data$PID,
    race           = data$raceCoalesced,
    state          = data$state,
    Pence          = data$Pence,
    SenateTerm     = data$SenateTerm,
    Yellen         = data$Yellen,
    placeboCorrect = data$placeboCorrect,
    passedScreenerMedia  = data$passedScreenerMedia,
    passedScreenerSalary = data$passedScreenerSalary,
    screenerSum    = data$screenerSum) %>%
  { if (USE_WEIGHTS || ONLY_SUBJECTS_WHO_HAVE_WEIGHTS) filter(., !is.na(weights)) else . } %>%
  { if (ONLY_SUBJECTS_WHO_ANSWERED_CONTROL_QUESTIONS) filter_at(., vars(age:Yellen), all_vars(!is.na(.))) else . } 

mainDO <- main_dfForEstimation %>%        # DO = "design object"
  { svydesign(
      ids     = ~ID, 
      data    = ., 
      weights = if (USE_WEIGHTS) main_dfForEstimation$weight else NULL
    ) 
  }

main_dfForEstimation_subset <- subset(
    x      = main_dfForEstimation, 
    subset = condition_fullQuestionsOnly %in% qw("3:easy 5:hard OE"))
main_dfForEstimation_subset$condition  <- droplevels(main_dfForEstimation_subset$condition)

mainSubsetDO <- main_dfForEstimation_subset %>%        # DO = "design object"
  { svydesign(
      ids     = ~ID, 
      data    = ., 
      weights = if (USE_WEIGHTS) main_dfForEstimation_subset$weight else NULL
    ) 
  }

main_lm1 <- svyglm(correct_fullQuestionsOnly ~ condition_fullQuestionsOnly + question,  
  design      = mainSubsetDO,
  singular.ok = FALSE)
main_lm2   <- update(
  object  = main_lm1,
  formula = . ~ . + age + age.sq + educ + female + PID + race + state + Pence + SenateTerm + Yellen)



# **************************************************************************
# CORRECT-RESPONSE RATES: OPEN-ENDED VS. AVERAGE OF CLOSED-ENDED #### 
# **************************************************************************
# This is regression (4) in the tables.
openVersusAvgClosed_dfForEstimation <- main_dfForEstimation %>%
  mutate(
    condition = Recode(.$condition, '"OE"="OE"; NA=NA; else = "closed"') %>%
      relevel("OE")
  )

openVersusAvgClosedDO <- openVersusAvgClosed_dfForEstimation %>%  # DO = "design object"
  { svydesign(
      ids     = ~ID, 
      data    = ., 
      weights = if (USE_WEIGHTS) openVersusAvgClosed_dfForEstimation$weight else NULL
    ) 
  }

openVersusAvgClosed_lm1 <- svyglm(
  correct_fullQuestionsOnly ~ condition + question,  
  design      = openVersusAvgClosedDO,
  singular.ok = FALSE)

openVersusAvgClosed_lm2 <- update(
  object  = openVersusAvgClosed_lm1,
  formula = . ~ . + age + age.sq + educ + female + PID + race + state + Pence + SenateTerm + Yellen)



# **************************************************************************
# CREATE A REGRESSION TABLE FOR OUTPUT #### 
# **************************************************************************
# The regression table reports results from the six regressions estimated
# above. [2019 09 29]
if (MODELS_WITH_CONTROLS) { 
  regList <- llist(numROs_lm2, diff_lm2, main_lm2, openVersusAvgClosed_lm2)    
} else {
  regList <- llist(numROs_lm1, diff_lm1, main_lm1, openVersusAvgClosed_lm1)
}

regResultsTable <- regTable(regList, rowsToKeep = 'numROs|diffTF|condition|Intercept')

rownames(regResultsTable) <- c(
  'Intercept',
  'five response options',
  'difficult response options',
  'three easy response options',
  'five difficult response options',
  'closed-ended response options')[1:nrow(regResultsTable)]



# **************************************************************************
# CREATE FOOTER ROWS AND CAPTION ####
# **************************************************************************
dfList <- llist(
  numROs_dfForEstimation,
  diff_dfForEstimation,
  main_dfForEstimation_subset,
  openVersusAvgClosed_dfForEstimation)

getNumSubjects <- function (lmObjName, df = NULL) {
  # We want to know the number of subjects providing data for each
  # regression. But the regressions are "stacked"; that is, they contain 
  # multiple observations for each subject. So we cannot use nobs() to 
  # learn the number of subjects: nobs() will return the total number of
  # observations, not the number of subjects.  [2020 01 17]
  
  # We get, from the data frame that we passed to lm(), only the rows that 
  # were actually used to estimate the model.
  lmObj <- get(lmObjName)
    
  # Get the data frame to check.
  if (is.null(df)) {
    dfName    <- ifelse(
      grepl('main', lmObjName),
      sub('lm\\d', 'dfForEstimation_subset', lmObjName),
      sub('lm\\d', 'dfForEstimation', lmObjName))
    df        <- get(dfName) 
  }
    
  # Keep only the rows in the data frame that we used. Get the number of 
  # distinct subjects.
  rowNums <- rownames(lmObj$model)  # rowNums is a character vector
  df_onlyUsedRows <- df[rowNums, ]  # rowNums remains a character vector
  n_distinct(df_onlyUsedRows$ID)
}

regList.R2          <- sapply(regList, function (x) summary.lm(x)$r.squared %>% round(., 2))
regList.SER         <- sapply(regList, function (x) sd(resid(x)) %>% round(., 2))
regList.N_questions <- qw("12 6 6 6") 
regList.N_subjects  <- map_int(names(regList), getNumSubjects)
regList.N           <- sapply(regList, nobs)

latexColNames <- paste0('(', 1:length(regList), ')')  # "(1)", "(2)", etc.
latexFooterRows <- list(  
  c('$R^2$',                    regList.R2),
  c('Std. error of regression', regList.SER),
  c('Number of questions',      regList.N_questions),
  c('Number of subjects',       regList.N_subjects),
  c('Number of observations',   regList.N),
  c('Includes controls',        rep(qw("no yes"), 3))
)
latexFooterRows <- latexFooterRows[-length(latexFooterRows)]
latexFooterRows[[1]] <- gsub('^0\\.', '.', latexFooterRows[[1]])  # eliminate 0 before decimal point
latexFooterRows[[2]] <- gsub('^0\\.', '.', latexFooterRows[[2]])  # eliminate 0 before decimal point

latexCaption <- paste(
  '\\textit{Effects of number and difficulty of response options.}',
  'Cell entries are OLS estimates and standard errors.',
  'The outcomes are answers to the questions: correct~=~1, incorrect~=~0.',
  'The listed predictors are also coded 0 or~1.',
  'All regressions include fixed effects for each question.'
)
if (!USE_WEIGHTS && ONLY_SUBJECTS_WHO_ANSWERED_CONTROL_QUESTIONS) {
  latexCaption <- sub(
    "response options",
    "response options (unweighted data)",
    latexCaption)
} else if (!USE_WEIGHTS && !ONLY_SUBJECTS_WHO_ANSWERED_CONTROL_QUESTIONS) {
  latexCaption <- sub(
    "response options",
    "response options (unweighted data, larger sample)",
    latexCaption)
} else if (MODELS_WITH_CONTROLS) {
  latexCaption <- sub(
    "number and difficulty of response options",
    "number and difficulty of response options (analysis including control variables)",
    latexCaption)
} else if (!MODELS_WITH_CONTROLS && !ONLY_SUBJECTS_WHO_ANSWERED_CONTROL_QUESTIONS) {
  latexCaption <- sub("(Effects of number and difficulty of response options)", "\\1 (analysis including all subjects)", latexCaption) 
}
latexCaption <- paste(
  latexCaption,
  "The baseline condition for column (1) is ``three response options''; for column~(2), ``easy response options''; and for columns (3) and~(4), ``open-ended questions.''",
  "Standard errors are clustered at the respondent level.")  



# **************************************************************************
# CREATE LATEX TABLE ####
# **************************************************************************
regLatexTable <- latexTable(
  mat                     = regResultsTable,
  colNames                = latexColNames,
  footerRows              = latexFooterRows,
  headerFooter            = TRUE,
  decimalPlaces           = 2,
  spacerColumns           = seq(0, ncol(regResultsTable) - 1, by = 2),  # before rowname and after each SE column
  horizOffset             = '-.375in',
  caption                 = latexCaption,
  commandName             = case_when(
    MODELS_WITH_CONTROLS ~ 'TabAppendixMainResultsWithControls',
    !MODELS_WITH_CONTROLS && 
      ONLY_SUBJECTS_WHO_ANSWERED_CONTROL_QUESTIONS &&
      !USE_WEIGHTS ~ 'TabAppendixMainResultsWithoutWeights',
    !MODELS_WITH_CONTROLS && 
      !ONLY_SUBJECTS_WHO_ANSWERED_CONTROL_QUESTIONS &&
      !USE_WEIGHTS ~ 'TabAppendixMainResultsWithoutWeightsMoreSubjects',
    !ONLY_SUBJECTS_WHO_ANSWERED_CONTROL_QUESTIONS ~ 'TabAppendixMainResultsWithAllSubjects',
    TRUE                 ~ 'TabMainResults'), 
  callCommand             = FALSE)


# Insert vertical space between footer rows.  [2020 01 17]
ind <- min(grep('Number of', regLatexTable))
regLatexTable <- c(
  regLatexTable[1:(ind-1)], 
  "        \\addlinespace[.15in]",
  regLatexTable[ind:length(regLatexTable)]
)

# Write the LaTeX table to a file.
latexTablePDF(
  latexTable         = list(regLatexTable), 
  firstPageEmpty     = FALSE,  
  continuedFloat     = FALSE,  
  container          = FALSE,  # FALSE if inserting LaTeX tables into appendix of my paper
  outputFilenameStem = filenameStem,
  overwriteExisting  = TRUE,
  writePDF           = FALSE,
  writeTex           = TRUE,
  openPDFOnExit      = FALSE)



# **************************************************************************
# BOOTSTRAPPED CONFIDENCE INTERVAL FOR THE DIFFERENCE IN DIFFERENCES ####
# **************************************************************************
# Our main result is a difference of differences: 
# (%correct 3:easy - %correct 5:hard) - (%correct 5:hard - %correct open-ended).
# See our pre-analysis plan for details.  
#
# If we were estimating an ordinary difference of differences, we could just 
# use OLS to get a standard error for the difference of differences. But
# OLS won't suffice for the model that we've estimated, in which no single 
# estimate represents the difference of differences.
if (interactive() & !sourcing()) {
  bootFunc  <- function(main_dfForEstimation, indices) {
    main_dfForEstimation <- main_dfForEstimation[indices, ]
    mainSubsetDO <- main_dfForEstimation %>%        # DO = "design object"
      { svydesign(
          ids     = ~ID, 
          data    = ., 
          weights = if (USE_WEIGHTS) main_dfForEstimation_subset$weight else NULL
        ) 
      }
    main_lm1 <- svyglm(correct_fullQuestionsOnly ~ condition_fullQuestionsOnly + question,  
      design      = mainSubsetDO,
      singular.ok = FALSE)
    coeftest(main_lm1)['condition_fullQuestionsOnly3:easy', 'Estimate'] - 2*coeftest(main_lm1)['condition_fullQuestionsOnly5:hard', 'Estimate']
  }  
  bootResult  <- boot::boot(
    data      = main_dfForEstimation_subset, 
    statistic = bootFunc, 
    R         = 1000)
  CI_lm1 <- boot::boot.ci(bootResult, type = 'perc')
  CI_lm1
  
  bootfunc_lm2  <- function(main_dfForEstimation, indices) {
    main_dfForEstimation <- main_dfForEstimation[indices, ]
    mainSubsetDO <- main_dfForEstimation %>%        # DO = "design object"
      { svydesign(
          ids     = ~ID, 
          data    = ., 
          weights = if (USE_WEIGHTS) main_dfForEstimation_subset$weight else NULL
        ) 
      }
    main_lm2 <- svyglm(correct_fullQuestionsOnly ~ condition_fullQuestionsOnly + question + age + age.sq + educ + female + PID + race + state + Pence + SenateTerm + Yellen,
      design      = mainSubsetDO,
      singular.ok = FALSE)
    coeftest(main_lm2)['condition_fullQuestionsOnly3:easy', 'Estimate'] - 2*coeftest(main_lm2)['condition_fullQuestionsOnly5:hard', 'Estimate']
  }  
  bootResult_lm2  <- boot(
    data      = main_dfForEstimation_subset, 
    statistic = bootfunc_lm2, 
    R         = 1000)
  CI_lm2 <- boot.ci(bootResult_lm2, type = 'perc')
  CI_lm2
  
  # A p-value for the main result, if we want it.
  lht(main_lm1, "condition_fullQuestionsOnly3:easy - 2*condition_fullQuestionsOnly5:hard = 0")  
}



# **************************************************************************
# MAKE TABLES THAT EXCLUDE SUBJECTS WHO FAILED SCREENER QUESTIONS ####
# **************************************************************************
# Tables A6-A9 in the appendix.
if ( 
  CREATE_SCREENER_TABLES && 
  !MODELS_WITH_CONTROLS &&
  ONLY_SUBJECTS_WHO_ANSWERED_CONTROL_QUESTIONS &&
  USE_WEIGHTS) {

  regListForScreeners_original <- llist(
    numROs_lm1, diff_lm1, main_lm1, openVersusAvgClosed_lm1)

  
  # EXCLUDE SUBJECTS ON THE BASIS OF THEIR ANSWERS TO SCREENER QUESTIONS
  # Each of these lists has four regression objects. Each object corresponds
  # to a column in a regression table. (We are building four regression 
  # tables, each of which has four columns.)  
  regListForScreeners_noMedia <- sapply(        # exclude Ss who failed the "media" screener
    regListForScreeners_original, 
    function (x) update(x, subset = passedScreenerMedia))
  regListForScreeners_noSalaries <- sapply(     # exclude Ss who failed the "justices' salaries" screener
    regListForScreeners_original, 
    function (x) update(x, subset = passedScreenerSalary)) 
  regListForScreeners_excludeEither <- sapply(  # exclude Ss who failed either screener question
    regListForScreeners_original, 
    function (x) update(x, subset = passedScreenerMedia & passedScreenerSalary)) 
  regListForScreeners_excludeBoth <- sapply(    # exclude Ss who failed both screener questions
    regListForScreeners_original, 
    function (x) update(x, subset = passedScreenerMedia | passedScreenerSalary))
  
  # Build a list of lists.
  regListsForScreeners <- mget(x = ls(pat = '^regListForScreeners_(exclude|no)'))  
  
  
  # CREATE REGRESSION TABLES FOR OUTPUT
  # regResultsTables_screeners is a list of four matrices -- four regression
  # tables
  regResultsTables_screeners <- lapply(
    regListsForScreeners,
    regTable,
    rowsToKeep = 'numROs|diffTF|condition|Intercept')
  regResultsTables_screeners <- lapply(
    regResultsTables_screeners, 
    function (x) {
      rownames(x) <- c(
        'Intercept',
        'five response options',
        'difficult response options',
        'three easy response options',
        'five difficult response options',
        'closed-ended response options')[1:nrow(x)]
      x
    })


  # CREATE FOOTER ROWS AND CAPTIONS
  # Each of the map() commands returns a list with four elements, 
  # corresponding to the four regression tables that we're creating. Each of  
  # these list-elements in turn has four columns, each one corresponding to  
  # a column in one of the regression tables.  [2020 07 25]  
  regResults.R2 <- map(
    regListsForScreeners,
    function (x) map_dbl(x, function (lmObj) { summary.lm(lmObj)$r.squared %>% round(., 2) } )
  )
  regResults.SER <- map(
    regListsForScreeners,
    function (x) map_dbl(x, function (lmObj) { sd(resid(lmObj)) %>% round(., 2) } )
    # function (x) map_dbl(x, function (lmObj) { summary.lm(lmObj)$sigma %>% round(., 2) } )
  )
  regResults.N_questions <- qw("12 6 6 6")  
  regResults.N_subjects <- map(
    regListsForScreeners,
    function (x) map_int(x, function (lmObj) { lmObj$data$ID %>% n_distinct } )
  )
  regResults.N <- map(
    regListsForScreeners,
    function (x) map_int(x, nobs)
  )

  latexFooterRowsList <- rep(list(NULL), 4)
  for (i in 1:4) {
    latexFooterRowsList[[i]] <- list(  
      c('$R^2$',                    regResults.R2[[i]]),
      c('Std. error of regression', regResults.SER[[i]]),
      c('Number of questions',      regResults.N_questions),
      c('Number of subjects',       regResults.N_subjects[[i]]),
      c('Number of observations',   regResults.N[[i]])
    ) 
  }
  latexFooterRowsList <- latexFooterRowsList %>%
    setNames(names(regResults.R2)) %>%
    map(function (x) { 
        x[[1]] <- gsub('^0\\.', '.', x[[1]])  # eliminate 0 before decimal point
        x[[2]] <- gsub('^0\\.', '.', x[[2]])  # eliminate 0 before decimal point
        x
    })
  captionPhrases <- list(
    media    = "excluding subjects who failed media screener",
    salaries = "excluding subjects who failed salaries screener",
    either   = "excluding subjects who failed either screener",
    both     = "excluding subjects who failed both screeners")

 
  # MAKE LATEX TABLES
  regLatexTableList <- rep(list(NULL), 4) %>% setNames(names(regResultsTables_screeners))
  for (myName in names(regLatexTableList)) {
    regLatexTableList[[myName]] <- latexTable(
      mat           = regResultsTables_screeners[[myName]],
      colNames      = latexColNames,
      footerRows    = latexFooterRowsList[[myName]],
      headerFooter  = TRUE,
      decimalPlaces = 2,
      spacerColumns = seq(0, ncol(regResultsTable) - 1, by = 2),  # before rowname and after each SE column
      horizOffset   = '-.375in',
      caption       = case_when(
        grepl("Media",    myName) ~ sub("response options", paste0("response options (", captionPhrases$media,    ")"), latexCaption),
        grepl("Salaries", myName) ~ sub("response options", paste0("response options (", captionPhrases$salaries, ")"), latexCaption),
        grepl("Either",   myName) ~ sub("response options", paste0("response options (", captionPhrases$either,   ")"), latexCaption),        
        grepl("Both",     myName) ~ sub("response options", paste0("response options (", captionPhrases$both,     ")"), latexCaption)),
      commandName   = case_when(
        grepl("Media",    myName) ~ "TabAppendixMainResultsScreenMedia",
        grepl("Salaries", myName) ~ "TabAppendixMainResultsScreenSalaries",
        grepl("Either",   myName) ~ "TabAppendixMainResultsScreenEither",        
        grepl("Both",     myName) ~ "TabAppendixMainResultsScreenBoth"),
      callCommand   = FALSE)
  }
  
  # Insert vertical space between footer rows.  
  regLatexTableList <- lapply(
    regLatexTableList, 
    function (myTable) {
      ind <- min(grep('Number of', myTable))
      c(
        myTable[1:(ind-1)], 
        "        \\addlinespace[.15in]",
        myTable[ind:length(myTable)]
      )
  })


  # EXPORT TABLES TO LaTeX
  latexTablePDF(
    latexTable         = regLatexTableList, 
    firstPageEmpty     = FALSE,  
    container          = FALSE,  # FALSE if inserting LaTeX tables into appendix of my paper
    outputFilenameStem = here::here('float_output/Tables_A6-A9'),
    overwriteExisting  = TRUE,
    writePDF           = FALSE,
    writeTex           = TRUE)
  
}



# **************************************************************************
# APPENDIX: MAIN REGRESSIONS WITHOUT EXCLUDING ANY CONDITIONS ####
# **************************************************************************
# Table A13 in the appendix.
if (
  !MODELS_WITH_CONTROLS &&
  ONLY_SUBJECTS_WHO_ANSWERED_CONTROL_QUESTIONS && 
  ONLY_SUBJECTS_WHO_HAVE_WEIGHTS &&
  USE_WEIGHTS) {

    main_lm1_withAllConditions <- update(main_lm1, design = mainDO)  # no control variables
    main_lm2_withAllConditions <- update(main_lm2, design = mainDO)  # with control variables
    
    
    # MAKE REGRESSION TABLE
    regList_appendix <- llist(main_lm1_withAllConditions, main_lm2_withAllConditions)
    regTable_appendix <- regTable(
      regList_appendix, 
      rowsToKeep = 'numROs|diffTF|condition|Intercept')
    rownames(regTable_appendix) <- c(
      'Intercept',
      'three super-easy response options',
      'three easy response options',
      'three difficult response options',
      'five super-easy response options',
      'five easy response options',
      'five difficult response options')
    
    
    # GET INFO FOR FOOTER ROWS
    dfList <- llist(main_dfForEstimation_subset)
    regList_appendix.R2          <- sapply(regList_appendix, function (x) summary.lm(x)$r.squared %>% round(., 2))
    regList_appendix.SER         <- sapply(regList_appendix, function (x) sd(resid(x)) %>% round(., 2))
    regList_appendix.N_questions <- qw("6 6")
    regList_appendix.N_subjects  <- map_int(names(regList_appendix), getNumSubjects, df = main_dfForEstimation)
    regList_appendix.N           <- sapply(regList_appendix, nobs)
    
    
    # SET UP TABLE FOOTER
    latexFooterRows_appendix <- list(  
      c('$R^2$',                    regList_appendix.R2),
      c('Std. error of regression', regList_appendix.SER),
      c('Number of questions',      regList_appendix.N_questions),
      c('Number of subjects',       regList_appendix.N_subjects),
      c('Number of observations',   regList_appendix.N),
      c('Includes controls',        qw("no yes"))
    )
    latexFooterRows_appendix[[1]] <- gsub('^0\\.', '.', latexFooterRows_appendix[[1]])  # eliminate 0 before decimal point
    latexFooterRows_appendix[[2]] <- gsub('^0\\.', '.', latexFooterRows_appendix[[2]])  # eliminate 0 before decimal point
    latexCaption_appendix <- paste(
      '\\textit{Effects of number and difficulty of response options (analyses including all conditions).}',
      'Cell entries are OLS estimates and standard errors.',
      'The outcomes are answers to the questions: correct~=~1, incorrect~=~0.',
      'The listed predictors are also coded 0 or~1.',
      'Both regressions include fixed effects for each question.',
      "The baseline condition for both regressions is ``open-ended questions.''"
    )
    
    
    # CREATE LATEX TABLE
    regLatexTable_appendix <- latexTable(
      mat                     = regTable_appendix,
      colNames                = lt_colNumbers(),
      footerRows              = latexFooterRows_appendix,
      headerFooter            = TRUE,
      horizOffset             = '-.375in',
      caption                 = latexCaption_appendix,
      commandName             = 'TabAppendixMainRegressionsWithAllConditions',
      callCommand             = FALSE)
    
    # Insert vertical space between footer rows.  [2020 01 17]
    ind <- min(grep('Number of', regLatexTable_appendix))
    regLatexTable_appendix <- c(
      regLatexTable_appendix[1:(ind-1)], 
      "        \\addlinespace[.15in]",
      regLatexTable_appendix[ind:length(regLatexTable_appendix)]
    )
    
    # Write the LaTeX table to a file.
    latexTablePDF(
      latexTable         = list(regLatexTable_appendix), 
      firstPageEmpty     = FALSE,  
      continuedFloat     = FALSE,  
      container          = FALSE,  # FALSE if inserting LaTeX tables into appendix
      outputFilenameStem = here::here('float_output/Table_A13'),
      overwriteExisting  = TRUE,
      writePDF           = FALSE,
      writeTex           = TRUE,
      openPDFOnExit      = FALSE)
}



# **************************************************************************
# AUXILIARY RESULTS ####
# **************************************************************************
if (interactive() & !sourcing()) {

  # Page 14, note 9: "Although we excluded some conditions..."  [2020 01 31]
  main_dfForEstimation_subset %>%
    group_by(ID) %>%
    count(condition_fullQuestionsOnly) %>%  # number of times each S was in each of three conditions
    dplyr::summarize(totalN = sum(n)) %>%   # total num. of times, out of 6, that S was in one of the three conditions
    { table(.$totalN) } %>%                 # how many subjects were in one of three conditions 1 time, 2 times, ..., 6 times
    sum                                     # 1,968 subjects were in one of the three conditions at least once
    

  # Page 13: "The difference of differences is 29 - 21 = 8 percentage points
  # [95% CI: 3%, 10%]."
  if (ONLY_SUBJECTS_WHO_ANSWERED_CONTROL_QUESTIONS && !MODELS_WITH_CONTROLS) {
    bootFunc  <- function(data, indices) {

      mainSubsetDO <- data[indices, ] %>%        # DO = "design object"
        { svydesign(
            ids     = ~ID, 
            data    = ., 
            weights = if (USE_WEIGHTS) main_dfForEstimation_subset$weight else NULL
          ) 
        }      
      main_lm1 <- svyglm(correct_fullQuestionsOnly ~ condition_fullQuestionsOnly + question,  
        design      = mainSubsetDO,
        singular.ok = FALSE)
      
      openVersusAvgClosed_dfForEstimation <- openVersusAvgClosed_dfForEstimation[indices, ]     
      openVersusAvgClosedDO <- openVersusAvgClosed_dfForEstimation %>%  # DO = "design object"
        { svydesign(
            ids     = ~ID, 
            data    = ., 
            weights = if (USE_WEIGHTS) openVersusAvgClosed_dfForEstimation$weight else NULL
          ) 
        }
      openVersusAvgClosed_lm1 <- svyglm(
        correct_fullQuestionsOnly ~ condition + question,  
        design      = openVersusAvgClosedDO,
        singular.ok = FALSE)
      
      diff_a <- coeftest(main_lm1)['condition_fullQuestionsOnly3:easy', 'Estimate'] - coeftest(main_lm1)['condition_fullQuestionsOnly5:hard', 'Estimate']
      diff_c <- coeftest(openVersusAvgClosed_lm1)['conditionclosed', 'Estimate']
      diff_a - diff_c
    }
    
    bootResult <- boot::boot(
      data      = main_dfForEstimation_subset, 
      statistic = bootFunc, 
      R         = 1000)
    CI_lm1 <- boot::boot.ci(bootResult, type = 'perc')
    CI_lm1
  }


  # "[E]xcluding these subjects [who passed the placebo-test question] changes 
  #  none of our treatment-effect estimates by more than 0.004."  
  if (ONLY_SUBJECTS_WHO_ANSWERED_CONTROL_QUESTIONS && !MODELS_WITH_CONTROLS) {
    numROs_dfForEstimation_noPlacebo <- numROs_dfForEstimation %>%
      filter(!placeboCorrect)
    diff_dfForEstimation_noPlacebo <- diff_dfForEstimation %>%
      filter(!placeboCorrect)
    main_dfForEstimation_noPlacebo <- main_dfForEstimation_subset %>%
      filter(!placeboCorrect)
    openVersusAvgClosed_dfForEstimation_noPlacebo <- openVersusAvgClosed_dfForEstimation %>%
      filter(!placeboCorrect)
    
    numROsDO_noPlacebo <- numROs_dfForEstimation_noPlacebo %>%  # DO = "design object"
    { svydesign(
        ids     = ~ID, 
        data    = ., 
        weights = if (USE_WEIGHTS) numROs_dfForEstimation_noPlacebo$weight else NULL
      ) 
    }
    diffDO_noPlacebo <- diff_dfForEstimation_noPlacebo %>%  # DO = "design object"
    { svydesign(
        ids     = ~ID, 
        data    = ., 
        weights = if (USE_WEIGHTS) diff_dfForEstimation_noPlacebo$weight else NULL
      ) 
    }
    mainDO_noPlacebo <- main_dfForEstimation_noPlacebo %>%  # DO = "design object"
    { svydesign(
        ids     = ~ID, 
        data    = ., 
        weights = if (USE_WEIGHTS) main_dfForEstimation_noPlacebo$weight else NULL
      ) 
    }
    openVersusAvgClosedDO_noPlacebo <- openVersusAvgClosed_dfForEstimation_noPlacebo %>%  # DO = "design object"
    { svydesign(
        ids     = ~ID, 
        data    = ., 
        weights = if (USE_WEIGHTS) openVersusAvgClosed_dfForEstimation_noPlacebo$weight else NULL
      ) 
    }
    
    numROs_lm1_noPlacebo              <- update(numROs_lm1,              design = numROsDO_noPlacebo)
    diff_lm1_noPlacebo                <- update(diff_lm1,                design = diffDO_noPlacebo)
    main_lm1_noPlacebo                <- update(main_lm1,                design = mainDO_noPlacebo)
    openVersusAvgClosed_lm1_noPlacebo <- update(openVersusAvgClosed_lm1, design = openVersusAvgClosedDO_noPlacebo)  
    
    regList_noPlacebo <- llist(
      numROs_lm1_noPlacebo, 
      diff_lm1_noPlacebo, 
      main_lm1_noPlacebo, 
      openVersusAvgClosed_lm1_noPlacebo)
    regResultsTable_noPlacebo <- regTable(
      regList_noPlacebo, 
      rowsToKeep = 'numROs|diffTF|condition|Intercept')
  
    rownames(regResultsTable_noPlacebo) <- rownames(regResultsTable)
    
    # COMPARE RESULTS
    regResultsTable - regResultsTable_noPlacebo    
  }


  # SCREENERS
  data$screenerSum[!is.na(data$weight)] %>%
    table() %>%
    prop.table()  # 82% answered at least one question correctly.


  # SHOW THAT WE LOSE FOUR CASES DUE TO MISSINGNESS ON PARTY ID
  # Our descriptive figures have N = 1,961 -- that is the number of subjects
  # for whom we had complete demographic info, and thus the number for whom we
  # could compute weights. But our regressions have N = 1,957. The code here
  # shows that we lose the four cases because there are four subjects for whom
  # we have weights but no party-ID info.  [2020 07 23]
  tmp <- data.frame(
    weights = data$weight, 
    PID     = data$PID,
    psid    = data$psid) 
  n_distinct(tmp$psid)  # 2080
  
  psid_short <- tmp %>% 
    na.omit() %T>%
    { print(n_distinct(.$psid)) } %>%  # 1957
    pull(psid)
  
  psid_long <- tmp %>% 
    select(-PID) %>%
    na.omit() %T>%
   { print(n_distinct(.$psid)) } %>%  # 1961 
    pull(psid)
  
  # psids of subjects who have weights but no values for PID
  psid_targets <- psid_long[which(! psid_long %in% psid_short)] %>% unique  # 4 values
  
  # PID values for these four subjects
  data$PID[data$psid %in% psid_targets]       # all NA
  data$PID_stem[data$psid %in% psid_targets]  # all NA
  originalData$Q24.2[originalData$psid %in% psid_targets]             # "root" question -- all -99
  originalData$Q24.5[originalData$psid %in% psid_targets]  %>% table  # "stem" question -- all blank
}